Data Table

Designation Analysis

Column

Job Designation and Experience Level

Job Designation - Word Cloud

Job Designation and Salary

Salary Analysis

Column

Salary Distribution

Salary Distribution based on Company Size

Column

Median Salary by Experience Level and Remote Ratio

Salary Distribution based on Experience

Max & Min Salary Analysis

Column

Median Salary for Top 10 Highest Paid Job Titles

Bubble Chart of Job Title v/s Experience Level by Salary (Highest Paid)

Column

Median Salary for Top 10 Lowest Paid Job Titles

Bubble Chart of Job Title v/s Experience Level by Salary (Lowest Paid)

Work Year Analysis

Column

Scatter Plot of Median Salary by Year

Remote Ratio by Work Year

Column

Percentage Distribution of Work Years

Company Location by Work Year

Location Analysis

Column

Median Salary by Company Location

Top Employee Residence countries and their company location

---
title: "Data Science Salaries Dashboard"
output: 
  flexdashboard::flex_dashboard:
    orientation: columns
    source_code: embed
    vertical_layout: scroll
    theme: flatly
    logo: "C:/Users/nishn/Downloads/icons8-money-with-wings-48.png"
---

<style>                     
.navbar {
  background-color:purple;
  border-color:white;
}
.navbar-brand {
color:black!important;
}

</style>  


```{r setup, include=FALSE}
library(flexdashboard)
library(dplyr)
library(ggplot2)
library(DT)
library(plotly)
library(wordcloud2)
library(sf)
library(spData)
```
  
Data Table
===================
```{r}
data=read.csv("C:/Users/nishn/Downloads/ds_salaries.csv")
data<- data %>%
  mutate(employment_type = case_when(
    employment_type == "FT" ~ "Full Time",
    employment_type == "PT" ~ "Part Time",
    employment_type == "CT" ~ "Contract",
    employment_type == "FL" ~ "Freelance",
    TRUE ~ employment_type
  ))
data<- data %>%
  mutate(experience_level = case_when(
    experience_level == "EN" ~ "Entry Level/Junior",
    experience_level == "MI" ~ "Mid-level/Intermediate",
    experience_level == "SE" ~ "Senior-level/Expert",
    experience_level == "EX" ~ "Executive-level/Director",
    TRUE ~ experience_level
  ))
data<- data %>%
  mutate(company_size = case_when(
    company_size == "L" ~ "Large",
    company_size == "M" ~ "Medium",
    company_size == "S" ~ "Small",
    TRUE ~ company_size
  ))
data$remote_ratio=as.character(data$remote_ratio)
data<- data %>%
  mutate(remote_ratio = case_when(
    remote_ratio == "0" ~ "No Remote Work",
    remote_ratio == "50" ~ "Partially Remote",
    remote_ratio == "100" ~ "Fully Remote",
    TRUE ~ remote_ratio
  ))



datatable(data)

```

Designation Analysis
======================

Column {data-width=500}
-----------------------------------------------------------------------

### Job Designation and Experience Level

```{r}
top_job_titles <- data %>%
  group_by(job_title) %>%
  summarise(count = n()) %>%
  arrange(desc(count)) %>%
  slice_head(n = 15) %>%
  pull(job_title)

# Filter the original data for the top 10 job titles
filtered_data <-data %>%
  filter(job_title %in% top_job_titles)

# Create a clustered bar chart with ggplot for the top 15 job titles and experience levels
clustered_bar_chart_top15 <- ggplot(filtered_data, aes(x = job_title, fill = experience_level)) +
  geom_bar(position = "dodge", stat = "count") +
  labs(title = " Experience Level vs. Top 15 Job Designations",
       x = "Job Title",
       y = "Count",
       fill="Experience Level") +
  theme_minimal()+
  theme(axis.text.x = element_text(angle = 45, hjust = 1 ),legend.position = "bottom",
        plot.title = element_text(hjust = 0.5))

# Convert ggplot to plotly for the top 15 job titles
interactive_clustered_bar_top15 <- ggplotly(clustered_bar_chart_top15, tooltip = "all")
interactive_clustered_bar_top15
```


### Job Designation - Word Cloud

```{r,fig.width=15}
text=data$job_title
my_data <- data.frame(text = text, freq = 1, stringsAsFactors = FALSE)
my_agr <- aggregate(freq ~ ., data = my_data, sum)
wordcloud2(data = my_agr,size=5)
```





### Job Designation and Salary

```{r}
violin_plot <- ggplot(filtered_data, aes(x = job_title, y = salary, fill = job_title)) +
  geom_violin() +
  labs(title = "Salary vs. Top 15 Job Titles",
       x = "Job Title",
       y = "Salary",
       fill="Job Title") +
  scale_y_continuous(limits=c(0,1000000))+
  theme_minimal()+
  theme(axis.text.x = element_text(angle = 45, hjust = 1 ),legend.position = "right",
        plot.title = element_text(hjust = 0.5))

ggplotly(violin_plot)

```
Salary Analysis
==================

Column {data-width=500}
-----------------------------------------------------------------------
### Salary Distribution
```{r}
gg=ggplot(data, aes(x = salary_in_usd)) +
  geom_density(alpha = 0.7,color="blue") +
  geom_rug(color="red") +  # Add a rug plot below
  labs(title = "Salary Distribution", x = "Salary") +
  theme_minimal()+
  scale_x_continuous(labels = scales::number_format(scale = 1))+
  theme(plot.title = element_text(hjust = 0.5))
  


ggplotly(gg)
```



### Salary Distribution based on Company Size
```{r}

gg <- ggplot(data, aes(x = salary_in_usd, color = company_size)) +
  geom_density(alpha = 0.7) +
  labs(title = "Salary Distribution based on Company Size", x = "Salary", color = "Company Size") +
  theme_minimal()+
  scale_x_continuous(labels = scales::number_format(scale = 1))

# Convert ggplot object to a plotly object
plotly_chart <- ggplotly(gg)

# Customize the appearance of the plotly chart
plotly_chart <- plotly_chart %>%
  layout(title = "Salary Distribution based on Company Size", xaxis = list(title = "Salary"), yaxis = list(title = "Density"))

# Display the interactive plot
plotly_chart
```

Column {data-width=500}
-----------------------------------------------------------------------

### Median Salary by Experience Level and Remote Ratio
```{r}
grouped_data1 <- data %>%
  group_by(experience_level, remote_ratio) %>%
  summarise(median_salary = median(salary))

plot_ly(data = grouped_data1, x = ~experience_level, y = ~remote_ratio, z = ~median_salary, type = 'heatmap') %>%
  layout(title = "Heatmap of Median Salary by Experience Level and Remote Ratio",
         xaxis = list(title = "Experience Level", categoryorder = "array", categoryarray = c("Entry Level/Junior", "Mid-level/Intermediate", "Senior-level/Expert", "Executive-level/Director")),
         yaxis = list(title = "Remote Ratio",categoryorder="array",categoryarray=c("Fully Remote","Partially Remote","No Remote Work")),
        legend=list(title="Median Salary"))

```

### Salary Distribution based on Experience
```{r}
gg <- ggplot(data, aes(x = salary_in_usd, color = factor(experience_level))) +
  geom_density(alpha = 0.8) +
  labs(title = "Salary Distribution based on Experience", x = "Salary", color = "Experience") +
  theme_minimal()+
  scale_x_continuous(labels = scales::number_format(scale = 1))

# Convert ggplot object to a plotly object
plotly_chart <- ggplotly(gg)

# Customize the appearance of the plotly chart
plotly_chart <- plotly_chart %>%
  layout(title = "Salary Distribution based on Experience", xaxis = list(title = "Salary"), yaxis = list(title = "Density"))

# Display the interactive plot
plotly_chart
```




Max & Min Salary Analysis
====================
Column {data-width=500}
-----------------------------------------------------------------------
### Median Salary for Top 10 Highest Paid Job Titles

```{r}

top_salary <- data %>%
  group_by(job_title) %>%
  summarise(med_salary = median(salary_in_usd)) %>%
  arrange(desc(med_salary)) %>%
  head(10)

top_10=top_salary%>% 
    ggplot(aes(x = reorder(job_title, -med_salary), y = med_salary, fill = job_title,text = paste("Job Title: ", job_title, "<br>Median Salary: $", round(med_salary, 2)))) +
  geom_bar(stat = "identity") +
  labs(title = "Median Salary for Top 10 Highest Paid Job Titles",
       x = "Job Title",
       y = "Salary") +
  theme_bw()+
  theme(axis.text.x = element_text(angle = 45, hjust = 1),legend.position = "none",
        plot.title = element_text(hjust = 0.5))+  scale_y_continuous(labels = scales::number_format(scale = 1))
  

ggplotly(top_10,tooltip = "text")
```


### Bubble Chart of Job Title v/s Experience Level by Salary (Highest Paid)
```{r}
med_salaries_high <- 
  inner_join(data,top_salary,by="job_title") %>%
  group_by(job_title, experience_level) %>%
  summarise(med_salary = median(salary_in_usd))

theme_set(theme_bw())  # pre-set the bw theme.
g <- ggplot(med_salaries_high, aes(job_title, experience_level,
    text=paste("Job Title:",job_title, "<br>Experience Level:",experience_level, "<br>Salary:",round(med_salary,2),"$"))) + 
  labs(title="Job Title v/s Experience Level by Salary (Highest Paid)",
       x="Job Title",
       y="Experience Level")+
  geom_jitter(aes(col=experience_level,size=med_salary))+
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        plot.title = element_text(hjust = 0.5),legend.position = "none")
ggplotly(g,tooltip = "text")

```



Column {data-width=500}
-----------------------------------------------------------------------
### Median Salary for Top 10 Lowest Paid Job Titles

```{r}
bottom_salary <- data %>%
  group_by(job_title) %>%
  summarise(med_salary = median(salary_in_usd)) %>%
  arrange(med_salary) %>%
  head(10)
  
bottom_10=bottom_salary%>% 
  ggplot(aes(x = reorder(job_title, med_salary), y = med_salary, fill = job_title,text = paste("Job Title: ", job_title, "<br>Median Salary: $", round(med_salary, 2)))) +
  geom_bar(stat = "identity") +
  labs(title = "Median Salary for Top 10 Lowest Paid Job Titles",
       x = "Job Title",
       y = "Salary") +
  theme_bw()+
  theme(axis.text.x = element_text(angle = 45, hjust = 1),legend.position = "none",
        plot.title = element_text(hjust = 0.5))+  scale_y_continuous(labels = scales::number_format(scale = 1))

ggplotly(bottom_10,tooltip = "text")

```


### Bubble Chart of Job Title v/s Experience Level by Salary (Lowest Paid)
```{r}
med_salaries_low <- 
  inner_join(data,bottom_salary,by="job_title") %>%
  group_by(job_title, experience_level) %>%
  summarise(med_salary = median(salary_in_usd))

theme_set(theme_bw())  # pre-set the bw theme.
g <- ggplot(med_salaries_low, aes(job_title, experience_level,
    text=paste("Job Title:",job_title, "<br>Experience Level:",experience_level, "<br>Salary:",round(med_salary,2),"$"))) + 
  labs(title="Job Title v/s Experience Level by Salary (Lowest Paid)",
       x="Job Title",
       y="Experience Level")+
  geom_jitter(aes(col=experience_level,size=med_salary))+
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        plot.title = element_text(hjust = 0.5),legend.position = "none")
ggplotly(g,tooltip = "text")
```


Work Year Analysis
==========================
Column {data-width=500}
-----------------------------------------------------------------------
### Scatter Plot of Median Salary by Year
```{r}
med_salaries<- 
  data %>% 
  group_by(work_year,experience_level) %>%
  summarise(med_salary = median(salary_in_usd))

scatter_plot<- ggplot(med_salaries,aes(x =work_year, y = med_salary, 
  color = experience_level,group=experience_level,
  text=paste("Year:",work_year,"<br>Salary:",round(med_salary,2),"$",
             "<br>Experience Level:",experience_level))) +
  geom_point(size = 3) +
  geom_line(size = 1, aes(color = experience_level))+
  labs(title = "Scatter Plot of Median Salary by Year",
       x = "Year",
       y = "Salary")+
  theme(plot.title = element_text(hjust = 0.5))

ggplotly(scatter_plot,tooltip="text")
```

### Remote Ratio by Work Year
```{r}
percentage_data <- data %>%
  group_by(work_year, remote_ratio) %>%
  summarise(count = n()) %>%
  group_by(work_year) %>%
  mutate(percentage = count / sum(count) * 100)

# Create a stacked bar chart for remote ratio by work year (interactive with percentage on hover)
gg <- ggplot(percentage_data, aes(x = work_year, y = percentage, fill = remote_ratio)) +
  geom_bar(stat = "identity") +
  labs(title = "Remote Ratio by Work Year", x = "Work Year", y = "Percentage",
       fill="Remote Ratio") +
  scale_y_continuous(labels = scales::percent_format(scale = 1)) +  # Format y-axis as percentage
  theme_minimal()+
  theme(plot.title = element_text(hjust = 0.5))


# Convert ggplot object to a plotly object
plotly_chart <- ggplotly(gg, tooltip = c("work_year", "remote_ratio", "percentage"))

# Display the interactive plot
plotly_chart

```


Column {data-width=500}
-----------------------------------------------------------------------
### Percentage Distribution of Work Years
```{r}
average_salaries<- 
  data %>% 
  group_by(work_year,company_size) 

percentage_data <- data %>% 
  group_by(work_year) %>% 
  summarize(count = n()) %>% 
  mutate(percentage = count / sum(count) * 100)

plot_ly(percentage_data, labels = ~work_year, values = ~percentage, type = "pie",
        hoverinfo = "label+percent") %>%
  layout(title = "Percentage Distribution of Work Years")

```


### Company Location by Work Year
```{r}
top_locations <- data %>%
  group_by(company_location) %>%
  summarise(location_count = n()) %>%
  arrange(desc(location_count)) %>% 
  head(15)

ij=inner_join(top_locations,data,by="company_location")

ij <- left_join(ij, world %>% select(iso_a2, name_long), by = c("company_location" = "iso_a2")) %>%
  mutate(company_location = coalesce(name_long, company_location)) %>%
  select(-name_long) %>% 
  st_drop_geometry(geom)



percentage_data1<- ij %>%
  group_by(work_year, company_location) %>%
  summarise(count = n()) %>%
  group_by(work_year) %>%
  mutate(percentage = count / sum(count) * 100)

# Create a stacked bar chart for remote ratio by work year (interactive with percentage on hover)
gg <- ggplot(percentage_data1, aes(x = work_year, y = percentage, fill = company_location)) +
  geom_bar(stat = "identity") +
  coord_flip()+
  labs(title = "Company Location by Work Year", x = "Work Year", y = "Percentage",
       fill="Company Location") +
  scale_y_continuous(labels = scales::percent_format(scale = 1)) +  # Format y-axis as percentage
  theme_minimal()+
  theme(plot.title = element_text(hjust = 0.5))

# Convert ggplot object to a plotly object
plotly_chart <- ggplotly(gg, tooltip = c("work_year", "company_location", "percentage"))

# Display the interactive plot
plotly_chart

```


Location Analysis
=====================

Column {data-width=500}
----------------------------
### Median Salary by Company Location

```{r map, echo=FALSE, results='asis'}

world <- spData::world


median_salary_by_country <- data %>%
  group_by(company_location) %>%
  summarise(med_salary = median(salary_in_usd)) %>% 
  arrange(desc(med_salary))

merged_data <- merge(world, median_salary_by_country, by.x = "iso_a2",
                     by.y = "company_location",all=TRUE) 

jmd=inner_join(merged_data,median_salary_by_country,by=c("iso_a2"="company_location")) %>% 
  select(iso_a2,name_long,med_salary.x) %>% 
  st_drop_geometry()


plot_ly(data = jmd, x = ~reorder(name_long, -med_salary.x), y = ~med_salary.x,
        type = 'bar', marker = list(color = rainbow(nrow(jmd)))) %>%
  layout(title = "Median Salary by Country", xaxis = list(title = "Country"), yaxis = list(title = "Median Salary"))

```



### Top Employee Residence countries and their company location

```{r}
emp_data=data%>% 
  group_by(employee_residence) %>% 
  summarise(count=n()) %>% 
  arrange(desc(count)) %>% 
  head(10)

data_top_emp=merge(data,emp_data,by="employee_residence",all=TRUE)
data_top_emp <- data_top_emp %>%
  filter(!is.na(count)) %>% 
  group_by(employee_residence)

dataind=data_top_emp %>% 
  filter(company_location!=employee_residence) %>% 
  select(company_location,employee_residence) %>% 
  group_by(company_location,employee_residence) %>% 
  summarise(count=n()) 

world=world %>% st_drop_geometry()

dataind <- left_join(dataind, world %>% select(iso_a2, name_long), by = c("company_location" = "iso_a2")) %>%
  mutate(company_location = coalesce(name_long, company_location)) %>%
  select(-name_long) %>% 
  st_drop_geometry(geom)

dataind <- left_join(dataind, world %>% select(iso_a2, name_long), by = c("employee_residence" = "iso_a2")) %>%
  mutate(employee_residence = coalesce(name_long, employee_residence)) %>%
  select(-name_long)%>% 
  st_drop_geometry()

# Create a ggplot object
p <- ggplot(dataind, aes(x = reorder(employee_residence, -count), text=paste("Company Location:",company_location,"<br>Count:",count), y = count, fill = dataind$company_location)) +
  geom_bar(stat = "identity") +
  labs(title = "Stacked Bar Chart of Counts by Employee Residence and Company Location",
       x = "Employee Residence",
       y = "Count",
       fill="Company Location") +
  theme_minimal()+
  theme(plot.title = element_text(hjust = 0.5))
  

# Convert ggplot object to plotly
p <- ggplotly(p,tooltip ="text")

# Show the interactive plot
p
```